library(DESeq2)
library(pheatmap)
library(dplyr)
library(dendextend)
library(ggplot2)
#Daniela File path
design_matrix<-read.table('/Users/danielaquijano/Documents/GitHub/Transcriptomics-Final-Project-/source_files/Experimental_Design_TG (1).csv',sep=',',header=TRUE)
head(design_matrix)
#Tasnim File path
#design_matrix<-read.table('/Users/tasnimtabassum/Documents/Transcriptomics_SP22/Experimental_Design_TG.csv',sep=',',header=TRUE)

#head(design_matrix)
rownames(design_matrix)<-design_matrix$Sample
design_matrix$Sample<-NULL

design_matrix
counts_matrix<-read.table("/Users/danielaquijano/Documents/GitHub/Transcriptomics-Final-Project-/Count_Tables/allcounts.csv",sep=',',header=TRUE)
counts_matrix

Because the numbers after the dot in the ensembl IDs represent versions of genes in certain annotations, we can remove these to more easily conduct our differential gene expression analysis.

counts_matrix$V1<-gsub("\\..*","",counts_matrix$V1)

counts_matrix
# remove the "V1" from col 1
rownames(counts_matrix)<-counts_matrix$V1
counts_matrix$V1<-NULL

#head(counts_matrix)

counts_matrix<-counts_matrix[,order(colnames(counts_matrix))]
counts_matrix
design_matrix<-design_matrix[order(rownames(design_matrix)),]

design_matrix
NA

design_matrix$Age = factor(design_matrix$Age)

design_matrix$Age
 [1] eight  eight  two    two    eight  eight  two    two    two    eight  eight  eight 
[13] two    two    eight  eight  two    two    two    two    eight  eight  eight  eight 
[25] two    two    six    six    six    six    twelve twelve twelve twelve twelve twelve
[37] twelve twelve six    six    twelve twelve six    six    twelve twelve six    six   
[49] six    six    twelve twelve six    six    twelve twelve six    six    twelve twelve
[61] twelve twelve twelve twelve six    six    six    six    two    two    eight  eight 
Levels: eight six twelve two
dds <- DESeqDataSetFromMatrix(countData = counts_matrix,
                              colData = design_matrix,
                              design = ~ Age+Genotype)
Warning in DESeqDataSet(se, design = design, ignoreRank) :
  some variables in design formula are characters, converting to factors
dds
class: DESeqDataSet 
dim: 46075 72 
metadata(1): version
assays(1): counts
rownames(46075): ENSMUSG00000000001 ENSMUSG00000000003 ... N_noFeature N_unmapped
rowData names(0):
colnames(72): SRR8512301 SRR8512302 ... SRR8512439 SRR8512440
colData names(3): Model Genotype Age
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]
dds <- DESeq(dds)
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
-- replacing outliers and refitting for 13457 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)
estimating dispersions
fitting model and testing
normalizedcounts.matrix <- counts(dds,normalized=T)
vst_dds <- vst(dds)

dists <- dist(t(assay(vst_dds)))
head(vst_dds)
class: DESeqTransform 
dim: 6 72 
metadata(1): version
assays(1): ''
rownames(6): ENSMUSG00000000001 ENSMUSG00000000028 ... ENSMUSG00000000049
  ENSMUSG00000000056
rowData names(43): baseMean baseVar ... replace dispFit
colnames(72): SRR8512301 SRR8512302 ... SRR8512439 SRR8512440
colData names(5): Model Genotype Age sizeFactor replaceable


PCA_Genotype<-plotPCA(vst_dds,intgroup=c("Age"))+labs(title = "PCA of mice of different ages", color = "Group")+coord_fixed(ratio=3)
Coordinate system already present. Adding new coordinate system, which will replace the existing one.
PCA_Genotype

resultsNames(dds)
[1] "Intercept"               "Age_six_vs_eight"        "Age_twelve_vs_eight"    
[4] "Age_two_vs_eight"        "Genotype_rtg4510_vs_J20" "Genotype_WT_vs_J20"     
[7] "Genotype_WT_TG_vs_J20"  
dds$Age
 [1] eight  eight  two    two    eight  eight  two    two    two    eight  eight  eight 
[13] two    two    eight  eight  two    two    two    two    eight  eight  eight  eight 
[25] two    two    six    six    six    six    twelve twelve twelve twelve twelve twelve
[37] twelve twelve six    six    twelve twelve six    six    twelve twelve six    six   
[49] six    six    twelve twelve six    six    twelve twelve six    six    twelve twelve
[61] twelve twelve twelve twelve six    six    six    six    two    two    eight  eight 
Levels: eight six twelve two
#Compare 2 vs 8
res_1 <- results(dds, contrast = c("Age", "six", "twelve"))
res1_ordered <- res_1[order(res_1$padj),] 
head(res1_ordered)
log2 fold change (MLE): Age six vs twelve 
Wald test p-value: Age six vs twelve 
DataFrame with 6 rows and 6 columns
                    baseMean log2FoldChange     lfcSE      stat      pvalue        padj
                   <numeric>      <numeric> <numeric> <numeric>   <numeric>   <numeric>
ENSMUSG00000095041 1831.0773      -0.917683 0.1154126  -7.95132 1.84528e-15 2.80538e-11
ENSMUSG00000021280   23.9611      -0.859667 0.1199499  -7.16688 7.67235e-13 5.83214e-09
ENSMUSG00000051242  191.0017      -0.421978 0.0618144  -6.82654 8.69846e-12 4.40809e-08
ENSMUSG00000020914   11.8771       1.650347 0.2570395   6.42060 1.35740e-10 5.15915e-07
ENSMUSG00000009376  146.5615       0.625863 0.1145294   5.46465 4.63819e-08 1.31636e-04
ENSMUSG00000027456   19.2858      -1.120057 0.2062910  -5.42950 5.65116e-08 1.31636e-04

Install Mouse annotation library:

library(biomaRt) #For conversion of transcript IDs to gene ID
library(annotables) #to retrieve grcm38 annotation for mouse genome
library(org.Mm.eg.db) #Mouse genome annotation
library(DOSE)
library(pathview)
library(clusterProfiler)
library(AnnotationHub) 
library(ensembldb)
library(tidyverse)
library(ggnewscale)
# mouse genome load
grcm38

# check that ensgene in our df is prsent in the mouse genome df

idx <- grcm38$ensgene %in% rownames(res1_ordered)
# head(idx)

# df with all the ids that are in our df from the mouse genome df
ids <- grcm38[idx, ]

# head(ids)

# remove duplicates

non_duplicates <- which(duplicated(ids$ensgene) == FALSE)
ids <- ids[non_duplicates, ]

#nrow(res1_ordered)

#rownames(res1_ordered)

# entrezID contains only the IDs that are also in our df

ensgeneID= grcm38[grcm38$ensgene %in% rownames(res1_ordered), ]

# entrezID contains only the IDs that are also in our df

entrezID= grcm38[grcm38$ensgene %in% rownames(res1_ordered), ]
# check nrow ensgeneID
head(ensgeneID)

# create a vector of only the ensgeneIDs
ensgene_ID_vector = c(ensgeneID[[1]])

head(ensgene_ID_vector)
[1] "ENSMUSG00000000001" "ENSMUSG00000000028" "ENSMUSG00000000031" "ENSMUSG00000000037"
[5] "ENSMUSG00000000049" "ENSMUSG00000000056"
# create a vector of only the entrezIDs
entrez_ID_vector = c(entrezID[[2]])

# create a vector of only the gene symbols
gene_symbols = subset(grcm38$symbol, grcm38$ensgene %in% rownames(res1_ordered))

# create new df that contains only the ensgeneIDs, lfc and padj
res2= data.frame(log2foldchange= subset(res1_ordered$log2FoldChange, grcm38$ensgene %in% rownames(res1_ordered)))
padj = subset(res1_ordered$padj, grcm38$ensgene %in% rownames(res1_ordered))
res2 = cbind(padj, res2)
res2 = cbind(ensgene_ID_vector, res2)
res2 = cbind(entrez_ID_vector, res2)
res2 = cbind(gene_symbols, res2)


# omit all "na" values
res2 = na.omit(res2)

res2 <- res2[order(res2$padj),] 
head(res2)
## Significant genes is a vector of fold changes where the names are ENTREZ gene IDs. The background set is a vector of all the genes represented on the platform.

# bg entrez contains all the ensgene

allOEgenes = as.character(res2$ensgene_ID_vector)

# sig res entrez contains all the entrezIDs that have padj <0.05
head(res2)

sigOE <- subset(res2, padj< 0.05)

head(sigOE)
# vector of only lfc values
sigOE_genes = as.character(sigOE$ensgene_ID_vector)

head(sigOE_genes)
[1] "ENSMUSG00000000001" "ENSMUSG00000000028" "ENSMUSG00000000037" "ENSMUSG00000000049"
[5] "ENSMUSG00000000056" "ENSMUSG00000000058"
## Run GO enrichment analysis 
ego <- enrichGO(gene = sigOE_genes, 
                universe = allOEgenes,
                keyType = "ENSEMBL",
                OrgDb = org.Mm.eg.db, 
                ont = "ALL", 
                pAdjustMethod = "BH", 
                qvalueCutoff = 0.05, 
                readable = TRUE,
                pool  = TRUE)

                
## Output results from GO analysis to a table
cluster_summary <- data.frame(ego)

#gene_ratio = cluster_summary[order(cluster_summary$pvalue, decreasing = FALSE), ]
#head(gene_ratio)
ego
#
# over-representation test
#
#...@organism    Mus musculus 
#...@ontology    GOALL 
#...@keytype     ENSEMBL 
#...@gene    chr [1:137] "ENSMUSG00000000001" "ENSMUSG00000000028" "ENSMUSG00000000037" ...
#...pvalues adjusted by 'BH' with cutoff <0.05 
#...48 enriched terms found
'data.frame':   48 obs. of  10 variables:
 $ ONTOLOGY   : chr  "CC" "BP" "BP" "BP" ...
 $ ID         : chr  "GO:0098552" "GO:0050680" "GO:0001937" "GO:0040013" ...
 $ Description: chr  "side of membrane" "negative regulation of epithelial cell proliferation" "negative regulation of endothelial cell proliferation" "negative regulation of locomotion" ...
 $ GeneRatio  : chr  "19/137" "10/137" "6/137" "13/137" ...
 $ BgRatio    : chr  "472/13470" "124/13470" "40/13470" "280/13470" ...
 $ pvalue     : num  2.97e-07 5.19e-07 2.86e-06 5.49e-06 7.12e-06 ...
 $ p.adjust   : num  0.00094 0.00094 0.00346 0.00498 0.00516 ...
 $ qvalue     : num  0.000819 0.000819 0.003014 0.004334 0.004492 ...
 $ geneID     : chr  "Gnai3/Cav2/Ngfr/Fer/Gna12/Th/Scnn1g/Glra1/Itgb2/Cdh1/Lck/Itga5/Gm2a/Btn1a1/Icosl/Il12rb1/Car4/Fasl/Il4" "Apoh/Cav2/Ngfr/Cdh1/Pparg/Acvrl1/Wdr77/Sox9/Krit1/Serpinf1" "Apoh/Cav2/Ngfr/Pparg/Acvrl1/Krit1" "Apoh/Ngfr/Wnt3/Gna12/Cdh1/Pparg/Pdgfb/Acvrl1/Adora3/Krit1/Sema4f/Serpinf1/Il4" ...
 $ Count      : int  19 10 6 13 15 6 11 11 11 11 ...
#...Citation
 T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu.
 clusterProfiler 4.0: A universal enrichment tool for interpreting omics data.
 The Innovation. 2021, 2(3):100141 
## Dotplot 
dotplot(ego, showCategory=35)+theme(text = element_text(size = 1)) +scale_y_discrete(labels=function(x) str_wrap(x, width=40))+ggtitle('Enriched genes when comparing J20 mice at 6 and 12 months')+ theme(plot.title = element_text(size=16))
Scale for 'y' is already present. Adding another scale for 'y', which will replace the
existing scale.

#barplot(ego, showCategory = 20)
options(ggrepel.max.overlaps = Inf)

## To color genes by log2 fold changes, we need to extract the log2 fold changes from our results table creating a named vector
OE_foldchanges <- sigOE$log2foldchange

names(OE_foldchanges) <- sigOE$gene_symbols

## Cnetplot details the genes associated with one or more terms - by default gives the top 1 significant term (by padj)

cnetplot(ego, 
         categorySize="pvalue", 
         showCategory = 5,
         colorEdge = TRUE,
         circular = FALSE,
         node_label = "all",
         cex_category = 1.5,
         cex_gene = 0.75,
         cex_label_category = 1.5,
         cex_label_gene = 0.75,
         shadowtext = "all")+ggtitle('Enriched genes when comparing rtg4510 mice at 2 and 8 months')

NA
NA
NA
NA
library(enrichplot)
ego2 = pairwise_termsim(ego)
emapplot(ego2, showCategory = 20, colorEdge = TRUE)+ggtitle('Enriched Genes when comparing  mice at 6 and 12 months of age')


# Set-up

#BiocManager::install("SPIA")
library(SPIA)

## Significant genes is a vector of fold changes where the names are ENTREZ gene IDs. The background set is a vector of all the genes represented on the platform.

# bg entrez contains all the entrezIDs 

background_entrez <- res2$entrez_ID_vector

# sig res entrez contains all the entrezIDs that have padj <0.05

sig_res_entrez <- res2[which(res2$padj < 0.05), ]

# vector of only lfc values
sig_entrez <- sig_res_entrez$log2foldchange

head(sig_entrez)


# adding entrezIDs as names for the sig entrez
names(sig_entrez) <- sig_res_entrez$entrez_ID_vector

head(sig_entrez)

# remove dups
dups<-unique(names(sig_entrez[which(duplicated(names(sig_entrez)))]))
sig_entrez<-sig_entrez[!(names(sig_entrez) %in% dups)]


#de= as.vector(sig_entrez)

#de = sort(de, decreasing = FALSE)

# this step takes time

spia_result <- spia(de=sig_entrez, all=background_entrez, organism="mmu", plots=FALSE)
write.csv(spia_result, file = "spia_result_J20_age.csv")

# view one record at a time

subset(spia_result, ID == "04727")

head(res1_ordered)
log2 fold change (MLE): Age six vs twelve 
Wald test p-value: Age six vs twelve 
DataFrame with 6 rows and 6 columns
                    baseMean log2FoldChange     lfcSE      stat      pvalue        padj
                   <numeric>      <numeric> <numeric> <numeric>   <numeric>   <numeric>
ENSMUSG00000095041 1831.0773      -0.917683 0.1154126  -7.95132 1.84528e-15 2.80538e-11
ENSMUSG00000021280   23.9611      -0.859667 0.1199499  -7.16688 7.67235e-13 5.83214e-09
ENSMUSG00000051242  191.0017      -0.421978 0.0618144  -6.82654 8.69846e-12 4.40809e-08
ENSMUSG00000020914   11.8771       1.650347 0.2570395   6.42060 1.35740e-10 5.15915e-07
ENSMUSG00000009376  146.5615       0.625863 0.1145294   5.46465 4.63819e-08 1.31636e-04
ENSMUSG00000027456   19.2858      -1.120057 0.2062910  -5.42950 5.65116e-08 1.31636e-04
norm_counts_top_40 = normalizedcounts.matrix[row.names(head(res1_ordered, 40)), ]
nrow(norm_counts_top_40)
head(design_matrix)

annotation_columns<-design_matrix

row.names(annotation_columns) <- colnames(norm_counts_top_40)

library(pheatmap)

tiff("Heatmap_6,12_age.tiff", width = 7, height = 5, units = 'in', res = 300)
pheatmap(norm_counts_top_40, color=colorRampPalette(c("white", "lightpink", "purple4"))(30), scale="row", cluster_cols = T, show_rownames = T,fontsize = 7,fontsize_row = 4, fontsize_col = 4,labels_row = rownames(dists),annotation_col =annotation_columns,main='Differentially Expressed Genes in rtg4510 mice at 2 and 8 months old' )
dev.off()
null device 
          1 
---
title: "R Notebook"
output: html_notebook
---

```{r}

library(DESeq2)
library(pheatmap)
library(dplyr)
library(dendextend)
library(ggplot2)
```

```{r}
#Daniela File path
design_matrix<-read.table('/Users/danielaquijano/Documents/GitHub/Transcriptomics-Final-Project-/source_files/Experimental_Design_TG (1).csv',sep=',',header=TRUE)
head(design_matrix)
```

```{r}
#Tasnim File path
#design_matrix<-read.table('/Users/tasnimtabassum/Documents/Transcriptomics_SP22/Experimental_Design_TG.csv',sep=',',header=TRUE)

#head(design_matrix)
```

```{r}
rownames(design_matrix)<-design_matrix$Sample
design_matrix$Sample<-NULL

design_matrix
```

```{r}
counts_matrix<-read.table("/Users/danielaquijano/Documents/GitHub/Transcriptomics-Final-Project-/Count_Tables/allcounts.csv",sep=',',header=TRUE)
counts_matrix
```



Because the numbers after the dot in the ensembl IDs represent versions of genes in certain annotations, we can remove these to more easily conduct our differential gene expression analysis. 
```{r}
counts_matrix$V1<-gsub("\\..*","",counts_matrix$V1)

counts_matrix
```

```{r}
# remove the "V1" from col 1
rownames(counts_matrix)<-counts_matrix$V1
counts_matrix$V1<-NULL

#head(counts_matrix)

counts_matrix<-counts_matrix[,order(colnames(counts_matrix))]
counts_matrix
```

```{r}
design_matrix<-design_matrix[order(rownames(design_matrix)),]

design_matrix

```

```{r}

design_matrix$Age = factor(design_matrix$Age)

design_matrix$Age


dds <- DESeqDataSetFromMatrix(countData = counts_matrix,
                              colData = design_matrix,
                              design = ~ Age+Genotype)
dds

keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]
```


```{r}
dds <- DESeq(dds)
```

```{r}
normalizedcounts.matrix <- counts(dds,normalized=T)

```


```{r}
vst_dds <- vst(dds)

dists <- dist(t(assay(vst_dds)))
head(vst_dds)
```


```{r}


PCA_Genotype<-plotPCA(vst_dds,intgroup=c("Age"))+labs(title = "PCA of mice of different ages", color = "Group")+coord_fixed(ratio=3)

PCA_Genotype
```

```{r}
resultsNames(dds)
```
```{r}
dds$Age
```

```{r}
#Compare 2 vs 8
res_1 <- results(dds, contrast = c("Age", "six", "twelve"))
res1_ordered <- res_1[order(res_1$padj),] 
head(res1_ordered)
```


Install Mouse annotation library:

```{r}
library(biomaRt) #For conversion of transcript IDs to gene ID
library(annotables) #to retrieve grcm38 annotation for mouse genome
library(org.Mm.eg.db) #Mouse genome annotation
library(DOSE)
library(pathview)
library(clusterProfiler)
library(AnnotationHub) 
library(ensembldb)
library(tidyverse)
library(ggnewscale)
```

```{r}
# mouse genome load
grcm38

# check that ensgene in our df is prsent in the mouse genome df

idx <- grcm38$ensgene %in% rownames(res1_ordered)
# head(idx)

# df with all the ids that are in our df from the mouse genome df
ids <- grcm38[idx, ]

# head(ids)

# remove duplicates

non_duplicates <- which(duplicated(ids$ensgene) == FALSE)
ids <- ids[non_duplicates, ]
```

```{r}

#nrow(res1_ordered)

#rownames(res1_ordered)

# entrezID contains only the IDs that are also in our df

ensgeneID= grcm38[grcm38$ensgene %in% rownames(res1_ordered), ]

# entrezID contains only the IDs that are also in our df

entrezID= grcm38[grcm38$ensgene %in% rownames(res1_ordered), ]
# check nrow ensgeneID
head(ensgeneID)

# create a vector of only the ensgeneIDs
ensgene_ID_vector = c(ensgeneID[[1]])

head(ensgene_ID_vector)


# create a vector of only the entrezIDs
entrez_ID_vector = c(entrezID[[2]])

# create a vector of only the gene symbols
gene_symbols = subset(grcm38$symbol, grcm38$ensgene %in% rownames(res1_ordered))

# create new df that contains only the ensgeneIDs, lfc and padj
res2= data.frame(log2foldchange= subset(res1_ordered$log2FoldChange, grcm38$ensgene %in% rownames(res1_ordered)))
padj = subset(res1_ordered$padj, grcm38$ensgene %in% rownames(res1_ordered))
res2 = cbind(padj, res2)
res2 = cbind(ensgene_ID_vector, res2)
res2 = cbind(entrez_ID_vector, res2)
res2 = cbind(gene_symbols, res2)


# omit all "na" values
res2 = na.omit(res2)

res2 <- res2[order(res2$padj),] 
head(res2)
```

```{r}
## Significant genes is a vector of fold changes where the names are ENTREZ gene IDs. The background set is a vector of all the genes represented on the platform.

# bg entrez contains all the ensgene

allOEgenes = as.character(res2$ensgene_ID_vector)

# sig res entrez contains all the entrezIDs that have padj <0.05
head(res2)

sigOE <- subset(res2, padj< 0.05)

head(sigOE)
# vector of only lfc values
sigOE_genes = as.character(sigOE$ensgene_ID_vector)

head(sigOE_genes)

```


```{r}
## Run GO enrichment analysis 
ego <- enrichGO(gene = sigOE_genes, 
                universe = allOEgenes,
                keyType = "ENSEMBL",
                OrgDb = org.Mm.eg.db, 
                ont = "ALL", 
                pAdjustMethod = "BH", 
                qvalueCutoff = 0.05, 
                readable = TRUE,
                pool  = TRUE)

                
## Output results from GO analysis to a table
cluster_summary <- data.frame(ego)

#gene_ratio = cluster_summary[order(cluster_summary$pvalue, decreasing = FALSE), ]
#head(gene_ratio)


```
```{r}
ego
```


```{r fig.width=15,fig.height=20}
## Dotplot 
dotplot(ego, showCategory=35)+theme(text = element_text(size = 1)) +scale_y_discrete(labels=function(x) str_wrap(x, width=40))+ggtitle('Enriched genes when comparing J20 mice at 6 and 12 months')+ theme(plot.title = element_text(size=16))

#barplot(ego, showCategory = 20)
```





```{r fig.width=20,fig.height=20}
options(ggrepel.max.overlaps = Inf)

## To color genes by log2 fold changes, we need to extract the log2 fold changes from our results table creating a named vector
OE_foldchanges <- sigOE$log2foldchange

names(OE_foldchanges) <- sigOE$gene_symbols

## Cnetplot details the genes associated with one or more terms - by default gives the top 1 significant term (by padj)

cnetplot(ego, 
         categorySize="pvalue", 
         showCategory = 5,
         colorEdge = TRUE,
         circular = FALSE,
         node_label = "all",
         cex_category = 1.5,
         cex_gene = 0.75,
         cex_label_category = 1.5,
         cex_label_gene = 0.75,
         shadowtext = "all")+ggtitle('Enriched genes when comparing rtg4510 mice at 2 and 8 months')
         



```


```{r fig.width=15,fig.height=20}
library(enrichplot)
ego2 = pairwise_termsim(ego)
emapplot(ego2, showCategory = 20, colorEdge = TRUE)+ggtitle('Enriched Genes when comparing  mice at 6 and 12 months of age')

```


```{r}

# Set-up

#BiocManager::install("SPIA")
library(SPIA)

## Significant genes is a vector of fold changes where the names are ENTREZ gene IDs. The background set is a vector of all the genes represented on the platform.

# bg entrez contains all the entrezIDs 

background_entrez <- res2$entrez_ID_vector

# sig res entrez contains all the entrezIDs that have padj <0.05

sig_res_entrez <- res2[which(res2$padj < 0.05), ]

# vector of only lfc values
sig_entrez <- sig_res_entrez$log2foldchange

head(sig_entrez)


# adding entrezIDs as names for the sig entrez
names(sig_entrez) <- sig_res_entrez$entrez_ID_vector

head(sig_entrez)

# remove dups
dups<-unique(names(sig_entrez[which(duplicated(names(sig_entrez)))]))
sig_entrez<-sig_entrez[!(names(sig_entrez) %in% dups)]


```



```{r}


#de= as.vector(sig_entrez)

#de = sort(de, decreasing = FALSE)

# this step takes time

spia_result <- spia(de=sig_entrez, all=background_entrez, organism="mmu", plots=FALSE)

```

```{r}
write.csv(spia_result, file = "spia_result_J20_age.csv")

```



```{r}

# view one record at a time

subset(spia_result, ID == "04727")

```

```{r}

head(res1_ordered)
norm_counts_top_40 = normalizedcounts.matrix[row.names(head(res1_ordered, 40)), ]
```

```{r}
nrow(norm_counts_top_40)
```

```{r}
head(design_matrix)

annotation_columns<-design_matrix

row.names(annotation_columns) <- colnames(norm_counts_top_40)

library(pheatmap)
```

```{r}

tiff("Heatmap_6,12_age.tiff", width = 7, height = 5, units = 'in', res = 300)
pheatmap(norm_counts_top_40, color=colorRampPalette(c("white", "lightpink", "purple4"))(30), scale="row", cluster_cols = T, show_rownames = T,fontsize = 7,fontsize_row = 4, fontsize_col = 4,labels_row = rownames(dists),annotation_col =annotation_columns,main='Differentially Expressed Genes in rtg4510 mice at 2 and 8 months old' )
dev.off()
```
